#delimit;
set more off;

set logtype text;

log using /home/dnc2101/Accidental_Deaths/Vital_Stats/Summary_Stats_Types_Accidents/make_age_group_vars.log, replace;




/********************************************************************************************************************/
*
* This do file takes the Vital Stats Mortality data, and creates counts of the number of accidental deaths, other
* than auto accidents and drug overdoses, that occur amongst different age groups in each state in each year.  These
* counts are the numerators for the dependent variables in the regressions shown in Table 5 of the Joint and Several
* Liability and Accidental Deaths paper.  This do file creates these counts for the years 1981 through 1998, the 
* years of the data that use ICD-9 coding on cause of death;  a seperate do file does the same exact task for the 
* years 1999 through 2004, the years of data which use ICD-10 coding on cause of death
*
*
* Program by Dan Carvell, written between Fall 2008 and Spring 2010.  
*
*********************************************************************************************************************/






use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/Injury_Deaths_81_98.dta", clear;




/* the first thing to do in this do file is to drop deaths due to drug overdoses, auto accidents, and intentional injuries from the data,     */
/* plus injuries of unknown intent as well                                                                                                    */
/* after these types of deaths are dropped, I save a new dataset that has all these types of deaths dropped from it, so I can reopen this     */
/* new dataset at any later point, which saves time when calculating all of the demographic-group specific death rates                        */

** first, make sure that any death that isn't an injury death has been dropped;

drop if ucr282<30000;

** now drop the drug overdoses;

drop if ucr282==31700;

** now drop all intentional injuries and injuries of unknown intent;

drop if ucr282>33600;



** Now drop all the auto accidents;


drop if (ucr282>=30300 & ucr282<=31100);

** make sure those commands did what I wanted them to do;

sum ucr282;

tab ucr282;


** Now save this new dataset, which only has data on deaths due to accidents other than auto accidents and drug overdoses;

** the variable for state FIPS code is called fipsstr in this dataset, for compatibility with my other datasets I will want a variable called statefip for state FIPS;

gen statefip=fipsstr;

** I also want year to be called year rather than datayear;

** there's already a variable called year so I'll drop that variable and then make year==datayear + 1900;

drop year;

gen year=datayear + 1900;
 
sum year; 

compress;
   
save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/accidents_no_auto_no_OD_81_98.dta", replace;








** Now I can construct the counts of accidental deaths within the different demographic groups;
** I must make sure the names of the variables I construct here aren't exactly the same as the names of the variables for the state-year populations ;
** in different demographic groups, the denominator of the rate, which were made from the Census data;

** Some notes about the coding in the age variables in this data;
** decedents aged 0 to 364 days are "ager52" between 01 and 22, inclusive;
** decedents aged 0 to 364 days are also "age" between 200 and 699;
** decedents aged 1 to 99 years are "age" between 1 and 99;
** decedents aged 100 and above years are "age" between 100 and 199;




** Decedents aged 0 to 5;
** the population variable for the denominator for this rate is called "ages_0_to_5";

gen ages_0_to_5_count=0;
replace ages_0_to_5_count=1 if ( (ager52>=01 & ager52<=22) | (age>=1 & age<=5) );

label variable ages_0_to_5_count "count of accidents amongst ages 0 to 5";


collapse (sum) ages_0_to_5_count, by(statefip year); 


** sort the data so that the merge with other data goes OK;

sort statefip year;

** now save the data;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_0_to_5_Counts_81_98.dta", replace;





** Now, decedents aged 18 to 64;

use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/accidents_no_auto_no_OD_81_98.dta", clear;

** the population variable for the denominator for this rate is called "ages_18_to_64";

gen ages_18_to_64_count=0;
replace ages_18_to_64_count=1 if (age>=18 & age<=64);

label variable ages_18_to_64_count "count of accidents amongst ages 18 to 64";


collapse (sum) ages_18_to_64_count, by(statefip year); 


** sort the data so that the merge with other data goes OK;

sort statefip year;

** now save the data;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_18_to_64_Counts_81_98.dta", replace;




** Now, decedents aged 6 to 17;


use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/accidents_no_auto_no_OD_81_98.dta", clear;


** the population variable for the denominator for this rate is called "ages_6_to_17";

gen ages_6_to_17_count=0;
replace ages_6_to_17_count=1 if (age>=6 & age<=17);

label variable ages_6_to_17_count "count of accidents amongst ages 6 to 17";


collapse (sum) ages_6_to_17_count, by(statefip year); 


** sort the data so that the merge with other data goes OK;

sort statefip year;

** now save the data;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_6_to_17_Counts_81_98.dta", replace;





** Now decedents aged 65 and above;

use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/accidents_no_auto_no_OD_81_98.dta", clear;

** the population variable for the denominator for this rate is called "age_65_or_above";

gen age_65_or_above_count=0;
replace age_65_or_above_count=1 if (age>=65 & age<=199);

label variable age_65_or_above_count "count of accidents amongst ages 65 and above";


collapse (sum) age_65_or_above_count, by(statefip year); 


** sort the data so that the merge with other data goes OK;

sort statefip year;

** now save the data;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Age_65_or_Above_Counts_81_98.dta", replace;



















** merge all of these variables I've created on counts of accidental deaths within different demographic groups into one dataset;





use "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_0_to_5_Counts_81_98.dta", clear;
sort statefip year;

merge statefip year using "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_6_to_17_Counts_81_98.dta", unique;

drop if _merge!=3;


drop _merge;


sort statefip year;
merge statefip year using "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Ages_18_to_64_Counts_81_98.dta", unique;

drop if _merge!=3;

drop _merge;


sort statefip year;
merge statefip year using "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/Age_65_or_Above_Counts_81_98.dta", unique;

drop if _merge!=3;


drop _merge;








** browse and check the summary statistics to make sure everything looks OK;

summarize;

** I need to drop 17 obs for statefip==0, and the one obs for statefip==.;

drop if statefip==0;
drop if statefip==.;

summarize;


save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Cleaned_Datasets/All_Different_Age_Group_Counts_81_98.dta", replace;


log close;
